phaletes <- read_excel("../data/New Home Study- SVOCs_University of Toronto_March2024.xlsx", sheet = "Passive Air - PDMS (pg.m-3)",range = "A5:N145")
## New names:
## • `` -> `...6`
colnames(phaletes)[which(names(phaletes) == "House ID")] <- "House_ID" #change column names
colnames(phaletes)[which(names(phaletes) == "Sample ID")] <- "Sample_ID" #change column names 
colnames(phaletes)[which(names(phaletes) == "Period (month)")] <- "Period"
phaletes_detected <- phaletes[phaletes$Period %in% c(0, 3, 6, 9, 12), ]
phaletes_filled <- phaletes_detected %>%
  mutate(
DEP = ifelse(DEP == "<DL", 50, as.numeric(DEP)),
DPP = ifelse(DPP == "<DL", 101, as.numeric(DPP)),
DiBP = ifelse(DiBP == "<DL", 80, as.numeric(DiBP)),
DnBP = ifelse(DnBP == "<DL", 103, as.numeric(DPP)),
BzBP = ifelse(BzBP == "<DL", 87, as.numeric(BzBP)),
DEHP = ifelse(DEHP == "<DL", 75, as.numeric(DEHP)),
DnOP = ifelse(DnOP == "<DL", 69, as.numeric(DnOP)),
DiNP = ifelse(DiNP == "<DL", 102, as.numeric(DiNP))
) %>% 
  mutate(Period = as.numeric(as.character(Period))) %>%
  arrange(House_ID)
## Warning: There were 7 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `DEP = ifelse(DEP == "<DL", 50, as.numeric(DEP))`.
## Caused by warning in `ifelse()`:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 6 remaining warnings.
complete_houses <- phaletes_filled %>%
  group_by(House_ID) %>%
  filter(n_distinct(Period) == 5) %>%
  ungroup()
chemicals <- complete_houses[, c("House_ID","Period","DEP", "DPP", "DiBP", "DnBP", "BzBP", "DEHP", "DnOP","DiNP")]

aggregated_df <- chemicals %>%
  group_by(Period) %>%
  summarise(
    DEP = mean(DEP),
    DPP = mean(DPP),
    DiBP = mean(DiBP),
    DnBP = mean(DnBP),
    BzBP = mean(BzBP),
    DEHP = mean(DEHP),
    DnOP = mean(DEHP),
    DiNP = mean(DiNP))

  
  

# Convert the data frame to a time series object for each chemical
# the period column represents months with records at 0, 3, 6, 9, 12
# Set the start to the first period (month 0) of the first year and frequency to 5
chemicals_ts <- lapply(aggregated_df[, -1], function(x) ts(x, start = c(2020, 1), frequency = 5))  # 5 periods per year

# Normalize the data 
chemicals_ts <- lapply(chemicals_ts, scale)

# Function to plot cross-correlation for a given pair of chemicals
plot_ccf <- function(ts1, ts2, name1, name2) {
  ts1 <- as.numeric(ts1)  
  ts2 <- as.numeric(ts2)
  ccf_result <- ccf(ts1, ts2,lag = 5,plot = FALSE)
  print(ccf_result)
  plot(ccf_result, main = paste("Cross-Correlation between", name1, "and", name2))
}

# Loop through each pair of chemicals and plot cross-correlation
chemical_names <- colnames(aggregated_df)[-1]
for(i in 1:(length(chemicals_ts) - 1)) {
  for(j in (i + 1):length(chemicals_ts)) {
    plot_ccf(chemicals_ts[[i]], chemicals_ts[[j]], chemical_names[i], chemical_names[j])
  }
}
## 
## Autocorrelations of series 'X', by lag
## 
##     -4     -3     -2     -1      0      1      2      3      4 
##  0.109 -0.175 -0.399 -0.284  0.803  0.592 -0.201 -0.425 -0.021

## 
## Autocorrelations of series 'X', by lag
## 
##     -4     -3     -2     -1      0      1      2      3      4 
## -0.066 -0.225 -0.359  0.065  0.715  0.688 -0.201 -0.587 -0.030

## 
## Autocorrelations of series 'X', by lag
## 
##     -4     -3     -2     -1      0      1      2      3      4 
##  0.074 -0.245 -0.376 -0.155  0.905  0.473 -0.231 -0.424 -0.021

## 
## Autocorrelations of series 'X', by lag
## 
##     -4     -3     -2     -1      0      1      2      3      4 
##  0.230 -0.046 -0.356 -0.633  0.613  0.544  0.031 -0.364 -0.019

## 
## Autocorrelations of series 'X', by lag
## 
##     -4     -3     -2     -1      0      1      2      3      4 
##  0.327  0.190 -0.189 -0.959  0.070  0.385  0.363 -0.177 -0.010

## 
## Autocorrelations of series 'X', by lag
## 
##     -4     -3     -2     -1      0      1      2      3      4 
##  0.327  0.190 -0.189 -0.959  0.070  0.385  0.363 -0.177 -0.010

## 
## Autocorrelations of series 'X', by lag
## 
##     -4     -3     -2     -1      0      1      2      3      4 
## -0.168 -0.331 -0.262  0.337  0.777  0.453 -0.148 -0.626 -0.032

## 
## Autocorrelations of series 'X', by lag
## 
##     -4     -3     -2     -1      0      1      2      3      4 
## -0.078 -0.246 -0.293  0.287  0.868  0.271 -0.634 -0.354  0.178

## 
## Autocorrelations of series 'X', by lag
## 
##     -4     -3     -2     -1      0      1      2      3      4 
##  0.087 -0.309 -0.444  0.288  0.980  0.001 -0.504 -0.226  0.127

## 
## Autocorrelations of series 'X', by lag
## 
##     -4     -3     -2     -1      0      1      2      3      4 
##  0.270 -0.118 -0.630 -0.213  0.915  0.253 -0.319 -0.272  0.113

## 
## Autocorrelations of series 'X', by lag
## 
##     -4     -3     -2     -1      0      1      2      3      4 
##  0.384  0.131 -0.596 -0.710  0.494  0.437  0.050 -0.251  0.060

## 
## Autocorrelations of series 'X', by lag
## 
##     -4     -3     -2     -1      0      1      2      3      4 
##  0.384  0.131 -0.596 -0.710  0.494  0.437  0.050 -0.251  0.060

## 
## Autocorrelations of series 'X', by lag
## 
##     -4     -3     -2     -1      0      1      2      3      4 
## -0.197 -0.341 -0.050  0.512  0.704  0.118 -0.531 -0.406  0.192

## 
## Autocorrelations of series 'X', by lag
## 
##     -4     -3     -2     -1      0      1      2      3      4 
##  0.121 -0.447 -0.521  0.418  0.854  0.228 -0.326 -0.249 -0.077

## 
## Autocorrelations of series 'X', by lag
## 
##     -4     -3     -2     -1      0      1      2      3      4 
##  0.376 -0.220 -0.746 -0.067  0.663  0.394 -0.152 -0.178 -0.068

## 
## Autocorrelations of series 'X', by lag
## 
##     -4     -3     -2     -1      0      1      2      3      4 
##  0.534  0.103 -0.705 -0.607  0.182  0.454  0.106 -0.031 -0.037

## 
## Autocorrelations of series 'X', by lag
## 
##     -4     -3     -2     -1      0      1      2      3      4 
##  0.534  0.103 -0.705 -0.607  0.182  0.454  0.106 -0.031 -0.037

## 
## Autocorrelations of series 'X', by lag
## 
##     -4     -3     -2     -1      0      1      2      3      4 
## -0.274 -0.434 -0.077  0.484  0.920  0.261 -0.430 -0.335 -0.116

## 
## Autocorrelations of series 'X', by lag
## 
##     -4     -3     -2     -1      0      1      2      3      4 
##  0.268 -0.101 -0.564 -0.363  0.859  0.357 -0.219 -0.316  0.077

## 
## Autocorrelations of series 'X', by lag
## 
##     -4     -3     -2     -1      0      1      2      3      4 
##  0.381  0.153 -0.485 -0.825  0.381  0.438  0.155 -0.241  0.041

## 
## Autocorrelations of series 'X', by lag
## 
##     -4     -3     -2     -1      0      1      2      3      4 
##  0.381  0.153 -0.485 -0.825  0.381  0.438  0.155 -0.241  0.041

## 
## Autocorrelations of series 'X', by lag
## 
##     -4     -3     -2     -1      0      1      2      3      4 
## -0.195 -0.351 -0.121  0.478  0.756  0.230 -0.428 -0.498  0.130

## 
## Autocorrelations of series 'X', by lag
## 
##     -4     -3     -2     -1      0      1      2      3      4 
##  0.338 -0.077 -0.508 -0.531  0.798  0.284 -0.082 -0.351  0.127

## 
## Autocorrelations of series 'X', by lag
## 
##     -4     -3     -2     -1      0      1      2      3      4 
##  0.338 -0.077 -0.508 -0.531  0.798  0.284 -0.082 -0.351  0.127

## 
## Autocorrelations of series 'X', by lag
## 
##     -4     -3     -2     -1      0      1      2      3      4 
## -0.173 -0.202  0.084  0.522  0.449 -0.230 -0.562 -0.291  0.404

## 
## Autocorrelations of series 'X', by lag
## 
##     -4     -3     -2     -1      0      1      2      3      4 
##  0.181 -0.358 -0.302 -0.021  1.000 -0.021 -0.302 -0.358  0.181

## 
## Autocorrelations of series 'X', by lag
## 
##     -4     -3     -2     -1      0      1      2      3      4 
## -0.093  0.054  0.287  0.382 -0.066 -0.697 -0.476  0.034  0.574

## 
## Autocorrelations of series 'X', by lag
## 
##     -4     -3     -2     -1      0      1      2      3      4 
## -0.093  0.054  0.287  0.382 -0.066 -0.697 -0.476  0.034  0.574

### Positive Lags (1, 2, 3): second series is lagged behind the first series ### Negative Lags (-1, -2, -3): first series is lagged behind the second series

  1. DEP & DnBP
  2. DEP &DEHP
  3. DEP & DnOP
  4. DPP & DiBP
  5. DPP & DNBP ***
  6. DPP & BzBP
  7. DiBP & DiNP
  8. DEHP & DnOP ***
# Loop through each pair of chemicals and plot cross-correlation
chemical_columns <- colnames(phaletes_filled)[7:14] # Adjust this based on the actual column indices
phaletes_normalized <- phaletes_filled
phaletes_normalized[chemical_columns] <- lapply(phaletes_filled[chemical_columns], scale)


# Loop through each pair of chemicals and plot cross-correlation
for (i in 1:(length(chemical_columns) - 1)) {
  for (j in (i + 1):length(chemical_columns)) {
    p <- ggplot(phaletes_normalized, aes_string(x = chemical_columns[i], y = chemical_columns[j])) + 
      geom_point(color = "#69b3a2") +
      geom_smooth(method = "lm", color = "red", fill = "#69b3a2", se = TRUE) +
      theme_ipsum() +
      ggtitle(paste("Scatter Plot with Linear Regression between", chemical_columns[i], "and", chemical_columns[j]))
    print(p)
  }
}
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

### 1. DPP and DnBP are significantly correlated 2. (DnOP and DiNP), (DPP, DiNP)are weakly correlated 3.